home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
AmigActive 2
/
AACD 2.iso
/
AACD
/
Programming
/
fpc
/
compiler
/
aopt386.inc
< prev
next >
Wrap
Text File
|
1998-09-24
|
55KB
|
1,315 lines
{
$Id: aopt386.inc,v 1.1.1.1 1998/03/25 11:18:12 root Exp $
Copyright (c) 1993-98 by Florian Klaempfl and Jonas Maebe
This include file contains the reloading optimizer for i386+
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
****************************************************************************
}
{$Define OptimizeMovs}
Type TwoWords = Record
Word1, Word2: Word
End;
Function Reg32(Reg: TRegister): TRegister;
{Returns the 32 bit component of Reg if it exists, otherwise Reg is returned}
Begin
Reg32 := Reg;
If (Reg >= R_AX)
Then
If (Reg <= R_DI)
Then Reg32 := Reg16ToReg32(Reg)
Else
If (Reg <= R_BL)
Then Reg32 := Reg8toReg32(Reg);
End;
Function RegInRef(Reg: TRegister; Const Ref: TReference): Boolean;
Begin {checks whether Ref contains a reference to Reg}
Reg := Reg32(Reg);
RegInRef := (Ref.Base = Reg) Or (Ref.Index = Reg)
End;
Function RegInInstruction(Reg: TRegister; p1: Pai): Boolean;
{checks if Reg is used by the instruction p1}
Var TmpResult: Boolean;
Begin
TmpResult := False;
If (Pai(p1)^.typ = ait_instruction) Then
Begin
Case Pai386(p1)^.op1t Of
Top_Reg: TmpResult := Reg = TRegister(Pai386(p1)^.op1);
Top_Ref: TmpResult := RegInRef(Reg, TReference(Pai386(p1)^.op1^))
End;
If Not(TmpResult) Then
Case Pai386(p1)^.op2t Of
Top_Reg:
if Pai386(p1)^.op3t<>Top_reg
then TmpResult := Reg = TRegister(Pai386(p1)^.op2)
else TmpResult := longint(Reg) = twowords(Pai386(p1)^.op2).word1;
Top_Ref: TmpResult := RegInRef(Reg, TReference(Pai386(p1)^.op2^))
End;
If Not(TmpResult) Then
Case Pai386(p1)^.op3t Of
Top_Reg: TmpResult := longint(Reg) =twowords(Pai386(p1)^.op2).word2;
Top_none:;
else
internalerror($Da);
End
End;
RegInInstruction := TmpResult
End;
Procedure ReloadOpt(AsmL: PaasmOutput);
Const MaxCh = 3;
{content types}
con_Unknown = 0;
con_ref = 1;
con_const = 2;
con_symbol = 3;
Type TChange = (C_None,
C_EAX, C_ECX, C_EDX, C_EBX, C_ESP, C_EBP, C_ESI, C_EDI,
{ C_AX, C_CX, C_DX, C_BX, C_SP, C_BP, C_SI, C_DI,
C_AL, C_CL, C_DL, C_BL,
C_AH, C_CH, C_BH, C_DH,
C_DEFAULT_SEG, C_CS, C_DS, C_ES, C_FS, C_GS, C_SS,
} C_Flags, C_FPU,
C_Op1, C_Op2, C_Op3,
C_MemEDI);
TAsmInstrucProp = Record
NCh: Byte;
Ch: Array[1..MaxCh] of TChange;
End;
TContent = Record
StartMod: Pointer; {start and end of block instructions that defines the
content of this register; If Typ = con_const, then
Longint(StartMod) = value of the constant)}
State: Word; {starts at 0, gets increased everytime the register is modified}
NrOfMods: Byte;
{ ModReg: TRegister; }{if one register gets a block assigned from an other register,
this variable holds the name of that register (so it can be
substituted when checking the block afterwards)}
Typ: Byte; {con_*}
{ CanBeDestroyed: Boolean;} {if it's a register modified by the optimizer}
End;
TRegContent = Array[R_NO..R_EDI] Of TContent;
TRegFPUContent = Array[R_ST..R_ST7] Of TContent;
TPaiProp = Record
Regs: TRegContent;
{ FPURegs: TRegFPUContent;} {currently not yet used}
LineSave: Longint;
{can this instruction be removed?}
CanBeRemoved: Boolean;
End;
PPaiProp = ^TPaiProp;
{$IfDef TP}
TPaiPropBlock = Array[1..(65520 div (((SizeOf(TPaiProp)+1)div 2)*2))] Of TPaiProp;
{$else}
TPaiPropBlock = Array[1..250000] Of TPaiProp;
{$EndIf TP}
PPaiPropBlock = ^TPaiPropBlock;
Const AsmInstr: Array[tasmop] Of TAsmInstrucProp = (
{MOV} (NCh: 1; Ch: (C_Op2, C_None, C_None)),
{MOVZX} (NCh: 1; Ch: (C_Op2, C_None, C_None)),
{MOVSX} (NCh: 1; Ch: (C_Op2, C_None, C_None)),
{LABEL} (NCh: 255; Ch: (C_None, C_None, C_None)), {don't know value of any register}
{ADD} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{CALL} (NCh: 255; Ch: (C_None, C_None, C_None)), {don't know value of any register}
{IDIV} (NCh: 3; Ch: (C_EAX, C_EDX, C_Flags)),
{IMUL} (NCh: 3; Ch: (C_EAX, C_EDX, C_Flags)), {handled separately, because several forms exist}
{JMP} (NCh: 255; Ch: (C_None, C_None, C_None)), {don't know value of any register}
{LEA} (NCh: 1; Ch: (C_Op2, C_None, C_None)),
{MUL} (NCh: 3; Ch: (C_EAX, C_EDX, C_Flags)),
{NEG} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{NOT} (NCh: 2; Ch: (C_Op1, C_Flags, C_None)),
{POP} (NCh: 2; Ch: (C_Op1, C_ESP, C_None)),
{POPAD} (NCh: 255; Ch: (C_None, C_None, C_None)), {don't know value of any register}
{PUSH} (NCh: 1; Ch: (C_ESP, C_None, C_None)),
{PUSHAD} (NCh: 1; Ch: (C_ESP, C_None, C_None)),
{RET} (NCh: 255; Ch: (C_None, C_None, C_None)), {don't know value of any register}
{SUB} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{XCHG} (NCh: 2; Ch: (C_Op1, C_Op2, C_None)), {(will be) handled seperately}
{XOR} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{FILD} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{CMP} (NCh: 1; Ch: (C_Flags, C_None, C_None)),
{JZ} (NCh: 0; Ch: (C_None, C_None, C_None)),
{INC} (NCh: 2; Ch: (C_Op1, C_Flags, C_None)),
{DEC} (NCh: 2; Ch: (C_Op1, C_Flags, C_None)),
{SETE} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETNE} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETL} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETG} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETLE} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETGE} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{JE} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JNE} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JL} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JG} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JLE} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JGE} (NCh: 0; Ch: (C_None, C_None, C_None)),
{OR} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{FLD} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FADD} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FMUL} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FSUB} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FDIV} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FCHS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FLD1} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FIDIV} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{CLTD} (NCh: 1; Ch: (C_EDX, C_None, C_None)),
{JNZ} (NCh: 0; Ch: (C_None, C_None, C_None)),
{FSTP} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{AND} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{JNO} (NCh: 0; Ch: (C_None, C_None, C_None)),
{NOTH} (NCh: 0; Ch: (C_None, C_None, C_None)), {***???***}
{NONE} (NCh: 0; Ch: (C_None, C_None, C_None)),
{ENTER} (NCh: 1; Ch: (C_ESP, C_None, C_None)),
{LEAVE} (NCh: 1; Ch: (C_ESP, C_None, C_None)),
{CLD} (NCh: 1; Ch: (C_Flags, C_None, C_None)),
{MOVS} (NCh: 3; Ch: (C_ESI, C_EDI, C_MemEDI)),
{REP} (NCh: 1; Ch: (C_ECX, C_None, C_None)),
{SHL} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{SHR} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{BOUND} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JNS} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JS} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JO} (NCh: 0; Ch: (C_None, C_None, C_None)),
{SAR} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{TEST} (NCh: 1; Ch: (C_Flags, C_None, C_None)),
{FCOM} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FCOMP} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FCOMPP} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FXCH} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FADDP} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FMULP} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FSUBP} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FDIVP} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FNSTS} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SAHF} (NCh: 1; Ch: (C_Flags, C_None, C_None)),
{FDIVRP} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FSUBRP} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{SETC} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETNC} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{JC} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JNC} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JA} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JAE} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JB} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JBE} (NCh: 0; Ch: (C_None, C_None, C_None)),
{SETA} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETAE} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETB} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETBE} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{AAA} (NCh: 2; Ch: (C_EAX, C_Flags, C_None)),
{AAD} (NCh: 2; Ch: (C_EAX, C_Flags, C_None)),
{AAM} (NCh: 2; Ch: (C_EAX, C_Flags, C_None)),
{AAS} (NCh: 2; Ch: (C_EAX, C_Flags, C_None)),
{CBW} (NCh: 1; Ch: (C_EAX, C_None, C_None)),
{CDQ} (NCh: 2; Ch: (C_EAX, C_EDX, C_None)),
{CLC} (NCh: 1; Ch: (C_Flags, C_None, C_None)),
{CLI} (NCh: 1; Ch: (C_Flags, C_None, C_None)),
{CLTS} (NCh: 0; Ch: (C_None, C_None, C_None)),
{CMC} (NCh: 1; Ch: (C_Flags, C_None, C_None)),
{CWD} (NCh: 2; Ch: (C_EAX, C_EDX, C_None)),
{CWDE} (NCh: 1; Ch: (C_EAX, C_None, C_None)),
{DAA} (NCh: 1; Ch: (C_EAX, C_None, C_None)),
{DAS} (NCh: 1; Ch: (C_EAX, C_None, C_None)),
{HLT} (NCh: 0; Ch: (C_None, C_None, C_None)),
{IRET} (NCh: 255; Ch: (C_None, C_None, C_None)), {don't know value of any register}
{LAHF} (NCh: 1; Ch: (C_EAX, C_None, C_None)),
{LODS} (NCh: 2; Ch: (C_EAX, C_ESI, C_None)),
{LOCK} (NCh: 0; Ch: (C_None, C_None, C_None)),
{NOP} (NCh: 0; Ch: (C_None, C_None, C_None)),
{PUSHA} (NCh: 1; Ch: (C_ESP, C_None, C_None)),
{PUSHF} (NCh: 1; Ch: (C_ESP, C_None, C_None)),
{PUSHFD} (NCh: 1; Ch: (C_ESP, C_None, C_None)),
{STC} (NCh: 1; Ch: (C_Flags, C_None, C_None)),
{STD} (NCh: 1; Ch: (C_Flags, C_None, C_None)),
{STI} (NCh: 1; Ch: (C_Flags, C_None, C_None)),
{STOS} (NCh: 2; Ch: (C_MemEDI, C_EDI, C_None)),
{WAIT} (NCh: 0; Ch: (C_None, C_None, C_None)),
{XLAT} (NCh: 1; Ch: (C_EAX, C_None, C_None)),
{XLATB} (NCh: 1; Ch: (C_EAX, C_None, C_None)),
{MOVSB} (NCh: 1; Ch: (C_Op2, C_None, C_None)),
{MOVSBL} (NCh: 1; Ch: (C_Op2, C_None, C_None)),
{MOVSBW} (NCh: 1; Ch: (C_Op2, C_None, C_None)),
{MOVSWL} (NCh: 1; Ch: (C_Op2, C_None, C_None)),
{MOVZB} (NCh: 1; Ch: (C_Op2, C_None, C_None)),
{MOVZWL} (NCh: 1; Ch: (C_Op2, C_None, C_None)),
{POPA} (NCh: 255; Ch: (C_None, C_None, C_None)), {don't know value of any register}
{IN} (NCh: 1; Ch: (C_Op2, C_None, C_None)),
{OUT} (NCh: 0; Ch: (C_None, C_None, C_None)),
{LDS} (NCh: 2; Ch: (C_Op2, C_None, C_None)),
{LCS} (NCh: 2; Ch: (C_Op2, C_None, C_None)),
{LES} (NCh: 2; Ch: (C_Op2, C_None, C_None)),
{LFS} (NCh: 2; Ch: (C_Op2, C_None, C_None)),
{LGS} (NCh: 2; Ch: (C_Op2, C_None, C_None)),
{LSS} (NCh: 2; Ch: (C_Op2, C_None, C_None)),
{POPF} (NCh: 2; Ch: (C_Flags, C_ESP, C_None)),
{SBB} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{ADC} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{DIV} (NCh: 3; Ch: (C_EAX, C_EDX, C_Flags)),
{ROR} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{ROL} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{RCL} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{RCR} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{SAL} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{SHLD} (NCh: 2; Ch: (C_Op3, C_Flags, C_None)),
{SHRD} (NCh: 2; Ch: (C_Op3, C_Flags, C_None)),
{LCALL} (NCh: 255; Ch: (C_None, C_None, C_None)), {don't know value of any register}
{LJMP} (NCh: 255; Ch: (C_None, C_None, C_None)), {don't know value of any register}
{LRET} (NCh: 255; Ch: (C_None, C_None, C_None)), {don't know value of any register}
{JNAE} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JNB} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JNA} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JNBE} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JP} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JNP} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JPE} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JPO} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JNGE} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JNG} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JNL} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JNLE} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JCXZ} (NCh: 0; Ch: (C_None, C_None, C_None)),
{JECXZ} (NCh: 0; Ch: (C_None, C_None, C_None)),
{LOOP} (NCh: 1; Ch: (C_ECX, C_None, C_None)),
{CMPS} (NCh: 3; Ch: (C_ESI, C_EDI, C_Flags)),
{INS} (NCh: 1; Ch: (C_EDI, C_None, C_None)),
{OUTS} (NCh: 1; Ch: (C_ESI, C_None, C_None)),
{SCAS} (NCh: 2; Ch: (C_EDI, C_Flags, C_None)),
{BSF} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{BSR} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{BT} (NCh: 1; Ch: (C_Flags, C_None, C_None)),
{BTC} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{BTR} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{BTS} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{INT} (NCh: 255; Ch: (C_None, C_None, C_None)), {don't know value of any register}
{INT3} (NCh: 0; Ch: (C_None, C_None, C_None)),
{INTO} (NCh: 255; Ch: (C_None, C_None, C_None)), {don't know value of any register}
{BOUNDL} (NCh: 0; Ch: (C_None, C_None, C_None)),
{BOUNDW} (NCh: 0; Ch: (C_None, C_None, C_None)),
{LOOPZ} (NCh: 1; Ch: (C_ECX, C_None, C_None)),
{LOOPE} (NCh: 1; Ch: (C_ECX, C_None, C_None)),
{LOOPNZ} (NCh: 1; Ch: (C_ECX, C_None, C_None)),
{LOOPNE} (NCh: 1; Ch: (C_ECX, C_None, C_None)),
{SETO} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETNO} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETNAE} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETNB} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETZ} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETNZ} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETNA} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETNBE} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETS} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETNS} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETP} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETPE} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETNP} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETPO} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETNGE} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETNL} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETNG} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SETNLE} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{ARPL} (NCh: 1; Ch: (C_Flags, C_None, C_None)),
{LAR} (NCh: 1; Ch: (C_Op2, C_None, C_None)),
{LGDT} (NCh: 0; Ch: (C_None, C_None, C_None)),
{LIDT} (NCh: 0; Ch: (C_None, C_None, C_None)),
{LLDT} (NCh: 0; Ch: (C_None, C_None, C_None)),
{LMSW} (NCh: 0; Ch: (C_None, C_None, C_None)),
{LSL} (NCh: 2; Ch: (C_Op2, C_Flags, C_None)),
{LTR} (NCh: 0; Ch: (C_None, C_None, C_None)),
{SGDT} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SIDT} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SLDT} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{SMSW} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{STR} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{VERR} (NCh: 1; Ch: (C_Flags, C_None, C_None)),
{VERW} (NCh: 1; Ch: (C_Flags, C_None, C_None)),
{FABS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FBLD} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FBSTP} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FCLEX} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FNCLEX} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FCOS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FDECSTP}(NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FDISI} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FNDISI} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FDIVR} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FENI} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FNENI} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FFREE} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FIADD} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FICOM} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FICOMP} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FIDIVR} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FIMUL} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FINCSTP}(NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FINIT} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FNINIT} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FIST} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FISTP} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FISUB} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FSUBR} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FLDCW} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FLDENV} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FLDLG2} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FLDLN2} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FLDL2E} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FLDL2T} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FLDPI} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FLDS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FLDZ} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FNOP} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FPATAN} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FPREM} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FPREM1} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FPTAN} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FRNDINT}(NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FRSTOR} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FSAVE} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FNSAVE} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FSCALE} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FSETPM} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FSIN} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FSINCOS}(NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FSQRT} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FST} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FSTCW} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FNSTCW} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FSTENV} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FNSTENV}(NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FSTSW} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FNSTSW} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FTST} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FUCOM} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FUCOMP} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FUCOMPP}(NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FWAIT} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FXAM} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FXTRACT}(NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FYL2X} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FYL2XP1}(NCh: 1; Ch: (C_FPU, C_None, C_None)),
{F2XM1} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FILDQ} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FILDS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FILDL} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FLDL} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FLDT} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FISTQ} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FISTS} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FISTL} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FSTL} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FSTS} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FSTPS} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FISTPL} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FSTPL} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FISTPS} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FISTPQ} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FSTPT} (NCh: 1; Ch: (C_Op1, C_None, C_None)),
{FCOMPS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FICOMPL}(NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FCOMPL} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FICOMPS}(NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FCOMS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FICOML} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FCOML} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FICOMS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FIADDL} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FADDL} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FIADDS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FISUBL} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FSUBL} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FISUBS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FSUBS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FSUBR} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FSUBRS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FISUBRL}(NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FSUBRL} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FISUBRS}(NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FMULS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FIMUL} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FMULL} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FIMULS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FIDIVS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FIDIVL} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FDIVL} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FIDIVS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FDIVRS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FIDIVRL}(NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FDIVRL} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{FIDIVRS}(NCh: 1; Ch: (C_FPU, C_None, C_None)),
{REPE} (NCh: 0; Ch: (C_ECX, C_None, C_None)),
{REPNE} (NCh: 0; Ch: (C_ECX, C_None, C_None)),
{FADDS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{POPFD} (NCh: 2; Ch: (C_ESP, C_Flags, C_None)),
{below are the MMX instructions}
{A_EMMS} (NCh: 1; Ch: (C_FPU, C_None, C_None)),
{A_MOVD} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_MOVQ} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PACKSSDW} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PACKSSWB} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PACKUSWB} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PADDB} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PADDD} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PADDSB} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PADDSW} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PADDUSB} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PADDUSW} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PADDW} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PAND} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PANDN} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PCMPEQB} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PCMPEQD} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PCMPEQW} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PCMPGTB} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PCMPGTD} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PCMPGTW} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PMADDWD} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PMULHW} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PMULLW} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_POR} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PSLLD} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PSLLQ} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PSLLW} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PSRAD} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PSRAW} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PSRLD} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PSRLQ} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PSRLW} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PSUBB} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PSUBD} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PSUBSB} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PSUBSW} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PSUBUSB} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PSUBUSW} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PSUBW} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PUNPCKHBW} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PUNPCKHDQ} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PUNPCKHWD} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PUNPCKLBW} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PUNPCKLDQ} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PUNPCKLWD} (NCh: 255; Ch: (C_FPU, C_None, C_None)),
{A_PXOR} (NCh: 255; Ch: (C_FPU, C_None, C_None)));
Var NrOfPaiObjs, NrOfPaiFast: Longint;
PaiPropBlock: PPaiPropBlock;
NrOfInstrSinceLastMod: Array[R_EAX..R_EDI] Of Byte;
Function TCh2Reg(Ch: TChange): TRegister;
{converts a TChange variable to a TRegister}
Begin
If (CH <= C_EDI)
Then TCh2Reg := TRegister(Byte(Ch))
Else InternalError($db)
End;
Procedure DestroyReg(p1: pai; Reg: TRegister);
{Destroys the contents of the register Reg in the PPaiProp of P}
Var TmpState: Longint;
Begin
Reg := Reg32(Reg);
NrOfInstrSinceLastMod[Reg] := 0;
If (Reg >= R_EAX) And (Reg <= R_EDI)
Then
Begin
TmpState := PPaiProp(p1^.line)^.Regs[Reg].State+1;
FillChar(PPaiProp(p1^.line)^.Regs[Reg], SizeOf(TContent), 0);
PPaiProp(p1^.line)^.Regs[Reg].State := TmpState;
End;
End;
(*Function FindZeroreg(p: Pai; Var Result: TRegister): Boolean;
{Finds a register which contains the constant zero}
Var Counter: TRegister;
Begin
Counter := R_EAX;
FindZeroReg := True;
While (Counter <= R_EDI) And
((PPaiProp(p^.line)^.Regs[Counter].Typ <> Con_Const) or
(PPaiProp(p^.line)^.Regs[Counter].StartMod <> Pointer(0))) Do
Inc(Byte(Counter));
If (PPaiProp(p^.line)^.Regs[Counter].Typ = Con_Const) And
(PPaiProp(p^.line)^.Regs[Counter].StartMod = Pointer(0))
Then Result := Counter
Else FindZeroReg := False;
End;*)
Procedure DestroyRefs(p: pai; Const Ref: TReference; WhichRegNot: TRegister);
{destroys all registers which possibly contain a reference to Ref}
Var Counter: TRegister;
Begin
WhichRegNot := Reg32(WhichRegNot);
If (Ref.base <> R_NO) Or
(Ref.index <> R_NO)
Then
Begin
If (Ref.base = ProcInfo.FramePointer)
Then
{write something to a parameter or a local variable}
For Counter := R_EAX to R_EDI Do
With PPaiProp(p^.line)^.Regs[Counter] Do
Begin
If (Counter <> WhichRegNot) And
(typ = Con_Ref) And
(Pai(StartMod)^.typ = ait_instruction) And
(Pai386(StartMod)^.op1t = top_ref) And
(RefsEqual(TReference(Pai386(StartMod)^.op1^), Ref) Or
(Not(cs_UncertainOpts in AktSwitches) And
(NrOfMods <> 1)))
Then DestroyReg(p, Counter)
End
Else
{writing something to a pointer location}
For Counter := R_EAX to R_EDI Do
With PPaiProp(p^.line)^.Regs[Counter] Do
If (Counter <> WhichRegNot) And
(typ = Con_Ref) And
(Not(cs_UncertainOpts in AktSwitches) Or
(Ref.Base = R_EDI) Or
(Not((NrOfMods = 1) And
(Pai(StartMod)^.typ = ait_instruction) And
(Pai386(StartMod)^.op1t = top_ref) And
(PReference(Pai386(StartMod)^.op1)^.base = ProcInfo.FramePointer))))
Then
DestroyReg(p, Counter) {we don't know what memory location the reference points to,
so we just destroy every register which contains a memory
reference}
End
Else {the ref is a var name or we just have a reference an absolute offset}
Begin
For Counter := R_EAX to R_EDI Do
If (Counter <> WhichRegNot) And
(PPaiProp(p^.line)^.Regs[Counter].typ = Con_Ref) And
(Not(cs_UncertainOpts in AktSwitches) Or
RefsEqual(Ref,
TReference(Pai386(PPaiProp(p^.line)^.Regs[Counter].StartMod)^.op1^))) Then
DestroyReg(p, Counter)
End;
End;
{$IfDef OptimizeMovs}
Function OpsEqual(typ: Longint; op1, op2: Pointer): Boolean;
Begin {checks whether the two ops are equal}
Case typ Of
Top_Reg, Top_Const: OpsEqual := op1 = op2;
Top_Ref: OpsEqual := RefsEqual(TReference(op1^), TReference(op2^));
Top_None: OpsEqual := True
Else OpsEqual := False
End;
End;
Function RegsSameContent(p1, p2: Pai; Reg: TRegister): Boolean;
{checks whether Reg has the same content in the PPaiProp of p1 and p2}
Begin
Reg := Reg32(Reg);
RegsSameContent :=
PPaiProp(p1^.line)^.Regs[Reg].State =
PPaiProp(p2^.line)^.Regs[Reg].State;
End;
Function InstructionsEqual(p1, p2: Pai): Boolean;
Begin {checks whether two Pai386 instructions are equal}
InstructionsEqual :=
Assigned(p1) And Assigned(p2) And
(Pai(p1)^.typ = ait_instruction) And
(Pai(p1)^.typ = ait_instruction) And
(Pai386(p1)^._operator = Pai386(p2)^._operator) And
(Pai386(p1)^.op1t = Pai386(p2)^.op1t) And
(Pai386(p1)^.op2t = Pai386(p2)^.op2t) And
OpsEqual(Pai386(p1)^.op1t, Pai386(p1)^.op1, Pai386(p2)^.op1) And
OpsEqual(Pai386(p1)^.op2t, Pai386(p1)^.op2, Pai386(p2)^.op2)
End;
Function CheckSequence(p: Pai; Reg: TRegister; Var Found: Longint): Boolean;
{checks whether the current instruction sequence (starting with p) and the
one between StartMod and EndMod of Reg are the same. If so, the number of
instructions that match is stored in Found and true is returned, otherwise
Found holds the number of instructions between StartMod and EndMod and false
is returned}
Var hp2, hp3, EndMod: Pai;
TmpResult: Boolean;
RegsNotYetChecked: Set Of TRegister;
Counter: Byte;
Function NoChangedRegInRef(oldp, newp: Pai): Boolean;
Var TmpP: Pai;
{checks if the first operator of newp is a reference and in that case checks
whether that reference includes regs that have been changed since oldp. This
to avoid wrong optimizations like
movl 8(%epb), %eax movl 8(%epb), %eax
movl 12(%epb), %edx movl 12(%epb), %edx
movl (%eax,%edx,1), %edi movl (%eax,%edx,1), %edi
pushl %edi being converted to pushl %edi
movl 8(%epb), %eax movl 16(%ebp), %edx
movl 16(%epb), %edx pushl %edi
movl (%eax,%edx,1), %edi
pushl %edi
because first is checked whether %eax isn't changed (it isn't) and
consequently all instructions containg %eax are removed}
Begin
TmpResult := True;
If (Pai(oldp)^.typ = ait_instruction) Then {oldp and newp are the same instruction}
Case Pai386(oldp)^.op1t Of
Top_Reg:
If (Reg32(TRegister(Pai386(oldp)^.op1)) in RegsNotYetChecked) Then
Begin
RegsNotYetChecked := RegsNotYetChecked - [Reg32(TRegister(Pai386(oldp)^.op1))];
If Assigned(newp^.Last)
Then
Begin
TmpP := Pai(newp^.last);
While Assigned (TmpP^.Last) And
PPaiProp(TmpP^.Line)^.CanBeRemoved Do
TmpP := Pai(TmpP^.Last);
TmpResult := Assigned(TmpP) And
RegsSameContent(oldp, TmpP, Reg32(TRegister(Pai386(oldp)^.op1)))
End
Else TmpResult := False;
End;
Top_Ref:
With TReference(Pai386(oldp)^.op1^) Do
Begin
If (Base in RegsNotYetChecked) And
(Base <> R_NO) Then
Begin
RegsNotYetChecked := RegsNotYetChecked - [Base];
If Assigned(newp^.Last)
Then
Begin
TmpP := Pai(newp^.last);
While Assigned (TmpP^.Last) And
PPaiProp(TmpP^.Line)^.CanBeRemoved Do
TmpP := Pai(TmpP^.Last);
TmpResult := Assigned(TmpP) And
RegsSameContent(oldp, TmpP, Base)
End
Else TmpResult := False;
End;
If TmpResult And
(Index <> R_NO) And
(Index in RegsNotYetChecked) Then
Begin
RegsNotYetChecked := RegsNotYetChecked - [Index];
If Assigned(newp^.Last)
Then
Begin
TmpP := Pai(newp^.last);
While Assigned (TmpP^.Last) And
PPaiProp(TmpP^.Line)^.CanBeRemoved Do
TmpP := Pai(TmpP^.Last);
TmpResult := Assigned(TmpP) And
RegsSameContent(oldp, TmpP, Index)
End
Else TmpResult := False;
End;
End;
End;
NoChangedRegInRef := TmpResult;
End;
Begin {CheckSequence}
Reg := Reg32(Reg);
Found := 0;
hp2 := p;
hp3 := PPaiProp(Pai(p^.last)^.line)^.Regs[Reg].StartMod;
EndMod := PPaiProp(Pai(p^.last)^.line)^.Regs[Reg].StartMod;
RegsNotYetChecked := [R_EAX..R_EDI];
For Counter := 2 to PPaiProp(Pai(p^.last)^.line)^.Regs[Reg].NrOfMods Do
EndMod := Pai(EndMod^.Next);
While (Found <> PPaiProp(Pai(p^.last)^.line)^.Regs[Reg].NrOfMods) And
InstructionsEqual(hp2, hp3) And
NoChangedRegInRef(EndMod, hp2) Do
Begin
hp2 := Pai(hp2^.next);
hp3 := Pai(hp3^.next);
Inc(Found)
End;
If (Found <> PPaiProp(Pai(p^.last)^.line)^.Regs[Reg].NrOfMods)
Then
Begin
CheckSequence := False;
If (found > 0) then
{this is correct because we only need to turn off the CanBeRemoved flag
when an instruction has already been processed by CheckSequence
(otherwise CanBeRemoved can't be true, or can't have to be turned off).
If it has already been processed by checkSequence and flagged to be
removed, it means that it has been checked against a previous sequence
and that it was equal (otherwise CheckSequence would have returned false
and the instruction wouldn't have been removed). If this "If found > 0"
check is left out, incorrect optimizations are performed.}
Found := PPaiProp(Pai(p)^.line)^.Regs[Reg].NrOfMods
End
Else CheckSequence := True;
End; {CheckSequence}
{$Endif OptimizeMovs}
Procedure DestroyAllRegs(p: Pai);
Var Counter: TRegister;
Begin {initializes/desrtoys all registers}
For Counter := R_EAX To R_EDI Do
DestroyReg(p, Counter);
End;
Procedure Destroy(PaiObj: Pai; opt: Longint; Op: Pointer);
Begin
Case opt Of
top_reg: DestroyReg(PaiObj, TRegister(Op));
top_ref: DestroyRefs(PaiObj, TReference(Op^), R_NO);
top_symbol:;
End;
End;
Function CreateRegs(First: Pai): Pai;
{Starts creating the reg contents for the instructions starting with p.
Returns the last pai which has been processed}
Var
TmpProp: PPaiProp;
Cnt, InstrCnt: Longint;
InstrProp: TAsmInstrucProp;
p: Pai;
TmpRef: TReference;
TmpReg: TRegister;
Begin
p := First;
InstrCnt := 1;
FillChar(NrOfInstrSinceLastMod, SizeOf(NrOfInstrSinceLastMod), 0);
While Assigned(p) Do
Begin
CreateRegs := p;
If (InstrCnt <= NrOfPaiFast)
Then TmpProp := @PaiPropBlock^[InstrCnt]
Else New(TmpProp);
If (p <> First)
Then TmpProp^ := PPaiProp(Pai(p^.last)^.line)^
Else FillChar(TmpProp^, SizeOf(TmpProp^), 0);
TmpProp^.LineSave := p^.line;
PPaiProp(p^.line) := TmpProp;
For TmpReg := R_EAX To R_EDI Do
Inc(NrOfInstrSinceLastMod[TmpReg]);
Case p^.typ Of
ait_label: DestroyAllRegs(p);
ait_labeled_instruction, ait_stabs, ait_stabn,
ait_stab_function_name:; {nothing changes}
ait_instruction:
Begin
InstrProp := AsmInstr[Pai386(p)^._operator];
Case Pai386(p)^._operator Of
{$IfDef OptimizeMovs}
A_MOV, A_MOVZX, A_MOVSX:
Begin
Case Pai386(p)^.op1t Of
Top_Reg:
Case Pai386(p)^.op2t Of
Top_Reg:
Begin
DestroyReg(p, TRegister(Pai386(p)^.op2));
{ TmpProp^.Regs[TRegister(Pai386(p)^.op2)] :=
TmpProp^.Regs[TRegister(Pai386(p)^.op1)];
If (TmpProp^.Regs[TRegister(Pai386(p)^.op2)].ModReg = R_NO) Then
TmpProp^.Regs[TRegister(Pai386(p)^.op2)].ModReg :=
Tregister(Pai386(p)^.op1);}
End;
Top_Ref: DestroyRefs(p, TReference(Pai386(p)^.op2^), TRegister(Pai386(p)^.op1));
End;
Top_Ref:
Begin {destination is always a register in this case}
TmpReg := Reg32(TRegister(Pai386(p)^.op2));
If (RegInRef(TmpReg, TReference(Pai386(p)^.op1^)))
Then
Begin
With PPaiProp(Pai(p)^.line)^.Regs[TmpReg] Do
Begin
Inc(State);
{also store how many instructions are part of the sequence in the first
instructions PPaiProp, so it can be easily accessed from within
CheckSequence}
If (typ <> Con_Ref) Then
Begin
typ := Con_Ref;
StartMod := p;
End;
Inc(NrOfMods, NrOfInstrSinceLastMod[TmpReg]);
PPaiProp(Pai(StartMod)^.line)^.Regs[TmpReg].NrOfMods := NrOfMods;
NrOfInstrSinceLastMod[TmpReg] := 0;
End;
End
Else
Begin
DestroyReg(p, TmpReg);
With PPaiProp(Pai(p)^.line)^.Regs[TmpReg] Do
Begin
Typ := Con_Ref;
StartMod := p;
NrOfMods := 1;
End;
End;
End;
Top_Const:
Begin
Case Pai386(p)^.op2t Of
Top_Reg:
Begin
TmpReg := Reg32(TRegister(Pai386(p)^.op2));
With TmpProp^.Regs[TmpReg] Do
Begin
{it doesn't matter that the state is changed,
it isn't looked at when removing constant reloads}
DestroyReg(p, TmpReg);
typ := Con_Const;
StartMod := Pai386(p)^.op1;
End
End;
Top_Ref: DestroyRefs(P, TReference(Pai386(p)^.op2^), R_NO);
End;
End;
End;
End;
{$EndIf OptimizeMovs}
A_IMUL:
Begin
If (Pai386(p)^.Op3t = top_none)
Then
If (Pai386(p)^.Op2t = top_none)
Then
Begin
DestroyReg(p, R_EAX);
DestroyReg(p, R_EDX)
End
Else
Begin
If (Pai386(p)^.Op2t = top_reg) Then
DestroyReg(p, TRegister(Pai386(p)^.Op2));
End
Else If (Pai386(p)^.Op3t = top_reg) Then
DestroyReg(p, TRegister(longint(twowords(Pai386(p)^.Op2).word2)));
End;
A_XOR:
Begin
If (Pai386(p)^.op1t = top_reg) And
(Pai386(p)^.op2t = top_reg) And
(Pai386(p)^.op1 = Pai386(p)^.op2)
Then
Begin
DestroyReg(p, Tregister(Pai386(p)^.op1));
TmpProp^.Regs[Reg32(Tregister(Pai386(p)^.op1))].typ := Con_Const;
TmpProp^.Regs[Reg32(Tregister(Pai386(p)^.op1))].StartMod := Pointer(0)
End
Else Destroy(p, Pai386(p)^.op2t, Pai386(p)^.op2);
End
Else
Begin
If InstrProp.NCh <> 255
Then
For Cnt := 1 To InstrProp.NCh Do
Case InstrProp.Ch[Cnt] Of
C_None:;
C_Op1: Destroy(p, Pai386(p)^.op1t, Pai386(p)^.op1);
C_Op2: Destroy(p, Pai386(p)^.op2t, Pai386(p)^.op2);
C_Op3: Destroy(p, Pai386(p)^.op2t, Pointer(Longint(TwoWords(Pai386(p)^.op2).word2)));
C_MemEDI:
Begin
FillChar(TmpRef, SizeOf(TmpRef), 0);
TmpRef.Base := R_EDI;
DestroyRefs(p, TmpRef, R_NO)
End;
C_EAX..C_EDI: DestroyReg(p, TCh2Reg(InstrProp.Ch[Cnt]));
C_Flags, C_FPU:;
End
Else
Begin
DestroyAllRegs(p);
End;
End;
End;
End
Else
Begin
DestroyAllRegs(p);
End;
End;
Inc(InstrCnt);
p := Pai(p^.next);
End;
End;
Procedure OptimizeBlock(First, Last: Pai);
{marks the instructions that can be removed by RemoveInstructs. They're not
removed immediately because sometimes an instruction needs to be checked in
two different sequences}
Var Cnt, Cnt2: Longint;
p, hp1, hp2: Pai;
Begin
p := First;
While (p <> Pai(Last^.Next)) Do
Begin
Case p^.typ Of
ait_label, ait_labeled_instruction:;
ait_instruction:
Begin
Case Pai386(p)^._operator Of
{$IfDef OptimizeMovs}
A_MOV{, A_MOVZX, A_MOVSX}:
Begin
Case Pai386(p)^.op1t Of
{ Top_Reg:
Case Pai386(p)^.op2t Of
Top_Reg:;
Top_Ref:;
End;}
Top_Ref:
Begin {destination is always a register in this case}
With PPaiProp(p^.line)^.Regs[Reg32(Tregister(Pai386(p)^.op2))] Do
Begin
If Assigned(p^.last) And
(PPaiProp(Pai(p^.last)^.line)^.Regs[Reg32(TRegister(Pai386(p)^.op2))].typ = con_ref) Then
{so we don't try to check a sequence when the register only contains a constant}
If CheckSequence(p, TRegister(Pai386(p)^.op2), Cnt) And
(Cnt > 0)
Then
Begin
hp1 := nil;
{although it's perfectly ok to remove an instruction which doesn't contain
the register that we've just checked (CheckSequence takes care of that),
the sequence containing this other register should also be completely
checked and removed, otherwise we may get situations like this:
movl 12(%ebp), %edx movl 12(%ebp), %edx
movl 16(%ebp), %eax movl 16(%ebp), %eax
movl 8(%edx), %edx movl 8(%edx), %edx
movl (%eax), eax movl (%eax), eax
cmpl %eax, %edx cmpl %eax, %edx
jnz l123 getting converted to jnz l123
movl 12(%ebp), %edx movl 4(%eax), eax
movl 16(%ebp), %eax
movl 8(%edx), %edx
movl 4(%eax), eax}
hp2 := p;
For Cnt2 := 1 to Cnt Do
Begin
If Not(Pai(p)^.typ In [ait_stabs, ait_stabn, ait_stab_function_name]) Then
Begin
If (hp1 = nil) And
Not(RegInInstruction(Tregister(Pai386(hp2)^.op2), p))
Then hp1 := p;
PPaiProp(p^.line)^.CanBeRemoved := True;
End;
p := Pai(p^.next);
End;
If hp1 <> nil Then p := hp1;
Continue;
End
Else
If (Cnt > 0) And
(PPaiProp(p^.line)^.CanBeRemoved) Then
Begin
hp2 := p;
For Cnt2 := 1 to Cnt Do
Begin
If RegInInstruction(Tregister(Pai386(hp2)^.op2), p)
Then PPaiProp(p^.Line)^.CanBeRemoved := False;
p := Pai(p^.Next)
End;
Continue;
End;
End;
End;
Top_Const:
Begin
Case Pai386(p)^.op2t Of
Top_Reg:
Begin
If Assigned(p^.last) Then
With PPaiProp(Pai(p^.last)^.line)^.Regs[Reg32(TRegister(Pai386(p)^.op2))] Do
If (Typ = Con_Const) And
(StartMod = Pai386(p)^.op1) Then
PPaiProp(p^.line)^.CanBeRemoved := True;
End;
Top_Ref:;
End;
End;
End;
End;
{$EndIf OptimizeMovs}
A_XOR:
Begin
If (Pai386(p)^.op1t = top_reg) And
(Pai386(p)^.op2t = top_reg) And
(Pai386(p)^.op1 = Pai386(p)^.op2) And
Assigned(p^.last) And
(PPaiProp(Pai(p^.last)^.line)^.Regs[Reg32(Tregister(Pai386(p)^.op1))].typ = con_const) And
(PPaiProp(Pai(p^.last)^.line)^.Regs[Reg32(Tregister(Pai386(p)^.op1))].StartMod = Pointer(0))
Then PPaiProp(p^.line)^.CanBeRemoved := True
End
End
End;
End;
p := Pai(p^.next);
End;
End;
Procedure RemoveInstructs(First, Last: Pai);
{Removes the marked instructions and disposes the PPaiProps of the other
instructions, restoring theirline number}
Var p, hp1: Pai;
TmpLine, InstrCnt: Longint;
Begin
p := First;
InstrCnt := 1;
While (p <> Pai(Last^.Next)) Do
If PPaiProp(p^.line)^.CanBeRemoved
Then
Begin
If (InstrCnt > NrOfPaiFast) Then
Dispose(PPaiProp(p^.Line));
hp1 := Pai(p^.Next);
AsmL^.Remove(p);
Dispose(p, Done);
p := hp1;
Inc(InstrCnt)
End
Else
Begin
If (InstrCnt > NrOfPaiFast)
Then
Begin
TmpLine := PPaiProp(p^.Line)^.LineSave;
Dispose(PPaiProp(p^.Line));
p^.Line := TmpLine;
End
Else p^.Line := PPaiProp(p^.Line)^.LineSave;
p := Pai(p^.Next);
Inc(InstrCnt)
End;
If (NrOfPaiFast > 0) Then
{$IfDef TP}
Freemem(PaiPropBlock, NrOfPaiFast*(((SizeOf(TPaiProp)+1)div 2)*2))
{$Else}
FreeMem(PaiPropBlock, NrOfPaiFast*(((SizeOf(TPaiProp)+3)div 4)*4))
{$EndIf TP}
End;
Function InitReloadOpt(AsmL: PAasmOutput): Boolean;
{reserves memory for the PPaiProps in one big memory block when not using
TP, returns False if not enough memory is available for the optimizer in all
cases}
Var p: Pai;
Begin
P := Pai(AsmL^.First);
NrOfPaiObjs := 1;
While (P <> Pai(AsmL^.Last)) Do
Begin
Inc(NrOfPaiObjs);
P := Pai(P^.next)
End;
{$IfDef TP}
If (MemAvail < (SizeOf(TPaiProp)*NrOfPaiObjs))
{this doesn't have to be one contiguous block}
Then InitReloadOpt := False
Else
Begin
InitReloadOpt := True;
If (MaxAvail < 65520)
Then NrOfPaiFast := MaxAvail Div (((SizeOf(TPaiProp)+1) div 2)*2)
Else NrOfPaiFast := 65520 Div (((SizeOf(TPaiProp)+1) div 2)*2);
If (NrOfPaiFast > 0) Then
GetMem(PaiPropBlock, NrOfPaiFast*(((SizeOf(TPaiProp)+1) div 2)*2));
End;
{$Else}
{Uncomment the next line to see how much memory the reloading optimizer needs}
{ Writeln((NrOfPaiObjs*(((SizeOf(TPaiProp)+3)div 4)*4)));}
{no need to check mem/maxavail, we've got as much virtual memory as we want}
InitReloadOpt := True;
GetMem(PaiPropBlock, NrOfPaiObjs*(((SizeOf(TPaiProp)+3)div 4)*4));
InitReloadOpt := True;
NrOfPaiFast := NrOfPaiObjs;
{$EndIf TP}
End;
Var BlockEnd: Pai;
Begin {ReloadOpt}
If InitReloadOpt(AsmL)
Then
Begin
BlockEnd := CreateRegs(Pai(AsmL^.First));
OptimizeBlock(Pai(AsmL^.First), BlockEnd);
RemoveInstructs(Pai(AsmL^.First), BlockEnd)
End;
End;
{
$Log: aopt386.inc,v $
Revision 1.1.1.1 1998/03/25 11:18:12 root
* Restored version
Revision 1.22 1998/03/24 21:48:29 florian
* just a couple of fixes applied:
- problem with fixed16 solved
- internalerror 10005 problem fixed
- patch for assembler reading
- small optimizer fix
- mem is now supported
Revision 1.21 1998/03/11 15:45:35 florian
* -Oa problem solved
Revision 1.20 1998/03/10 01:17:13 peter
* all files have the same header
* messages are fully implemented, EXTDEBUG uses Comment()
+ AG... files for the Assembler generation
Revision 1.19 1998/03/09 16:46:27 jonas
* fixed bug with uncertain optimizations when moving data among variables using movsl
Revision 1.18 1998/03/04 16:42:00 jonas
* bugfix in destroyrefs and fixed a potential bug in createregs
Revision 1.17 1998/03/03 20:33:29 jonas
* TContent record now only occupies 8 bytes anymore
Revision 1.15 1998/03/03 01:08:13 florian
* bug0105 and bug0106 problem solved
Revision 1.14 1998/03/02 21:35:16 jonas
* added comments from last update
Revision 1.13 1998/03/02 21:29:06 jonas
* redesigned TContent record so it occupies only 13 bytes anymore (was about 18)
* store TPaiProps of 16 and 8 bit registers in those of the 32 bit regs
* fixed a small bug which prevented some optimizes from being performed
* store TPaiProps in one big array instead of in seperate records
* warning: TP version not tested because I only have TP, not BP (-> no proteced mode apps)
Revision 1.12 1998/02/24 21:18:13 jonas
* file name back to lower case
Revision 1.4 1998/02/24 20:32:12 jonas
* added comments from latest commit
Revision 1.3 1998/02/24 20:27:51 jonas
* if a register is being written to memory, it's contents aren't destroyed
(wherever it's been written to, its contents are up-to-date)
* changed the order in which some functions/procedure are defined, because some
of them are now used by aopt386.pas
Revision 1.11 1998/02/19 22:46:54 peter
* Fixed linebreaks
Revision 1.10 1998/02/13 10:34:31 daniel
* Made Motorola version compilable.
* Fixed optimizer
Revision 1.9 1998/02/12 17:18:49 florian
* fixed to get remake3 work, but needs additional fixes (output, I don't like
also that aktswitches isn't a pointer)
Revision 1.8 1998/02/12 11:49:37 daniel
Yes! Finally! After three retries, my patch!
Changes:
Complete rewrite of psub.pas.
Added support for DLL's.
Compiler requires less memory.
Platform units for each platform.
Revision 1.7 1998/02/07 10:11:19 michael
* RefsEqual made less harsh:
* when something is written to x(%ebp), registers which contain
a pointer that isn't "x(%ebp)"-based isn't destroyed
* when something is written to a pointer location, registers
which contain the contents of x(%ebp) aren't destroyed
Revision 1.6 1998/01/12 17:45:20 jonas
* merged DisposeProps and RemoveInstructs procedures (speed!)
Revision 1.5 1998/01/11 22:51:30 jonas
* back to unix linebreaks...(hate it! :)
Revision 1.4 1998/01/11 22:50:10 jonas
* all floating point store operations now change op1 instead of the fpu regs
Revision 1.3 1998/01/11 14:40:04 jonas
* bugfix in optimize procedure (too many instructions were removed in certain cases)
Revision 1.1 1997/12/30 21:10:34 jonas
* changed back to unix/linux line breaks
Pre-CVS log:
JM Jonas Maebe
+ feature added
- removed
* bug fixed or changed
History (started on 2nd December 1997):
2nd December 1997:
+ initial version (JM)
+ removes redundant "xor %reg, %reg"'s (JM)
3rd December 1997:
+ removes certain redundant movs (still bugged) (JM)
* A_REP now destroys ECX
4th December 1997:
* fixed bugs in mov-removal (still bugged) (JM)
5th December 1997:
* fixed more bugs in mov-removal (a compiler compiled with these
optimizations now can compile itself suyccessfully!) and enhanced
it (introducing new bugs, which have to be fixed again...) (JM)
* A_AND and A_OR now destroy op2 instead of op1 <g> (JM)
6th December 1997:
* A_PUSHAD now only destroys ESP instead of all registers (JM)
* A_REPE and A_REPNE now also destroy ECX (JM)
* Rewrote some procedures so it's a bit more modular and easier/
cleaner/possible to do some optimizations, but it's slower (JM)
* enabled mov-reloading optimization for A_MOVZX and A_MOVSX
(actually it's already 7 December, 1:25 am in the mean time :) (JM)
7th December 1997:
* All instructions okayed by CheckSequence are now being removed (JM)
To Do:
* special case for A_XCHG
* implementation of ModReg comparing
* special case for lea
* fpu optimizing
* active optimizing (ie. change certain register allocations)
* make DestroyRefs a little less harsh
* bug fixes?
}